
##################################################
## Human Trafficking Indicators: A New Dataset ##
## Author: Richard W Frank                     ##
## Journal: International Interactions         ##
## Code written: April 2021                    ##
## Software: rstan ver. 2.15.1, R ver. 3.3.1   ##
## Purpose: Running static IRT model           ## 
#################################################


rm(list=ls())
library(foreign)  
library(rstan)  
library(dplyr)
library(loo)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
library(ggplot2)
 

## Change "~" to your working directory ##
setwd("~")

## load processed data
load("HTI_stan_data_prepped.RData")

#########################################################

k2_names<-c("id", "country" , "ccode","year",
            'dest',
            "ldest",
            "pdest",
            "ddest",
            "dsdest",
            "cldest", 
            "cpdest")
k5_names<-c("id", "country", "ccode","year",
            "u_vict_5", 
            "c_sex_5", 
            "c_labor_5", 
            "c_vic_5")
data_k2 <- data[k2_names] 
data_k2[5:11]<-data_k2[5:11]
data_k5<-data[k5_names]

n <- nrow(data)
#number of items in each category 
k2 <- ncol(data_k2)-4 
k5 <- ncol(data_k5)-4


#Vectorization
y_k2 <- c()
item_k2 <- c()
id_k2 <- c()
year_k2 <- c()
for(ii in (5):(k2+4)){
  y_k2 <- append(y_k2, data_k2[,ii])
  id_k2 <- append(id_k2, data_k2$id)
  year_k2 <- append(year_k2, data_k2$year)
  item_k2 <- append(item_k2, rep(ii-4, length(data_k2[,ii])))
}
id_k2 <- id_k2[!is.na(y_k2)] 
year_k2 <- year_k2[!is.na(y_k2)] 
item_k2 <- item_k2[!is.na(y_k2)] 
y_k2 <- y_k2[!is.na(y_k2)] 
 

y_k5 <- c()
item_k5 <- c()
id_k5 <- c()
year_k5 <- c()
for(ii in (5):(k5+4)){
  y_k5 <- append(y_k5, data_k5[,ii])
  id_k5 <- append(id_k5, data_k5$id)
  year_k5 <- append(year_k5, data_k5$year)
  item_k5 <- append(item_k5, rep(ii-4, length(data_k5[,ii])))
}
id_k5 <- id_k5[!is.na(y_k5)] 
year_k5 <- year_k5[!is.na(y_k5)] 
item_k5 <- item_k5[!is.na(y_k5)] 
y_k5 <- y_k5[!is.na(y_k5)] 

n_k2 <- length(y_k2)
 
n_k5 <- length(y_k5)

#########################################################
stan.data <- list(n=n, 
                  n_k2=n_k2,
                  k2=k2,
                  y_k2=y_k2 ,
                  item_k2=item_k2, 
                  id_k2=id_k2,
                  n_k5=n_k5,
                  k5=k5,
                  y_k5=y_k5,
                  item_k5=item_k5,
                  id_k5=id_k5                 
                  )

#### Build Model ####
model <- "

data {

int<lower=0> n; //number of subjects

int<lower=0> n_k2; //number of country-item observations w/ 2 cat
int<lower=0> k2; //number of items w/ 2 cat
int<lower=1,upper=2> y_k2[n_k2]; // manifest variables w/ 2 cat
int<lower=0> item_k2[n_k2]; //index of which item among 2 cat vars
int<lower=0> id_k2[n_k2]; //index of which subject for 2 cat

int<lower=0> n_k5; //number of country-item observations w/ 5 cat
int<lower=0> k5; //number of items w/ 5 cat
int<lower=1,upper=5> y_k5[n_k5]; // manifest variables w/ 5 cat
int<lower=0> item_k5[n_k5]; //index of which item among 5 cat vars
int<lower=0> id_k5[n_k5]; //index of which subject for 5 cat
}


parameters {
vector[n] x; //latent var

ordered[1] alpha_k2[k2]; //alpha for logit
vector<lower=0>[k2] beta_k2; //beta for logit

vector<lower=0>[k5] beta_k5; //betas for vars w/ 5 cat
ordered[4] c_k5[k5]; //matrix of cutpoints for vars w/ 5 cat
}

model {
for(j in 1:k2){
alpha_k2[j] ~ normal(0,10);
}
 
for(j in 1:k5){
c_k5[j,1] ~ normal(0,10); 
c_k5[j,2] ~ normal(0,10); 
c_k5[j,3] ~ normal(0,10); 
c_k5[j,4] ~ normal(0,10); 
}

beta_k2 ~ gamma(3,2); 
beta_k5 ~ gamma(3,2);

x ~ normal(0,1); 

for(ii in 1:n_k2){
y_k2[ii] ~ ordered_logistic(beta_k2[item_k2[ii]] * -x[id_k2[ii]],
alpha_k2[item_k2[ii]] ); 
}

for(ii in 1:n_k5){
y_k5[ii] ~ ordered_logistic(beta_k5[item_k5[ii]] * -x[id_k5[ii]], 
c_k5[item_k5[ii]]);
}  

}

generated quantities {
	vector[n_k2] y_hat_k2; 
	vector[n_k5] y_hat_k5;

for(ii in 1:n_k2){
		y_hat_k2[ii] = ordered_logistic_rng(beta_k2[item_k2[ii]] * -x[id_k2[ii]], 
		alpha_k2[item_k2[ii]]);
	}
for(ii in 1:n_k5){
		y_hat_k5[ii] = ordered_logistic_rng(beta_k5[item_k5[ii]] * -x[id_k5[ii]], 
		c_k5[item_k5[ii]]);
	}	
}
"
fit<- stan(model_code=model, data=stan.data, iter=2000, warmup=1000, verbose=T,
           pars=c("x", "alpha_k2", "beta_k2","c_k5", "beta_k5", "y_hat_k5", "y_hat_k5"),
           seed=123456789, chains=4, cores=4, control=list(adapt_delta=.95)) 

## once it is run then we save various parts of the output for future study
save.image("hti_static.RData")
output <- extract(fit, permuted = TRUE)
save(output, file="hti_static.RData")
